home *** CD-ROM | disk | FTP | other *** search
/ CU Amiga Super CD-ROM 18 / CU Amiga Magazine's Super CD-ROM 18 (1997)(EMAP Images)(GB)[!][issue 1998-01].iso / CUCD / Online / hsc / source / hsclib / skip.c < prev    next >
Encoding:
C/C++ Source or Header  |  1997-11-02  |  34.5 KB  |  1,220 lines

  1. /*
  2.  * This source code is part of hsc, a html-preprocessor,
  3.  * Copyright (C) 1995-1997  Thomas Aglassinger
  4.  *
  5.  * This program is free software; you can redistribute it and/or modify
  6.  * it under the terms of the GNU General Public License as published by
  7.  * the Free Software Foundation; either version 2 of the License, or
  8.  * (at your option) any later version.
  9.  *
  10.  * This program is distributed in the hope that it will be useful,
  11.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13.  * GNU General Public License for more details.
  14.  *
  15.  * You should have received a copy of the GNU General Public License
  16.  * along with this program; if not, write to the Free Software
  17.  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18.  *
  19.  */
  20. /*
  21.  * hsclib/skip.c
  22.  *
  23.  * functions for skipping several things
  24.  *
  25.  * updated: 13-Oct-1997
  26.  * created:  8-Oct-1995
  27.  */
  28.  
  29. #define NOEXTERN_HSCLIB_SKIP_H
  30.  
  31. #include "hsclib/inc_base.h"
  32.  
  33. #include "hsclib/input.h"
  34. #include "hsclib/skip.h"
  35.  
  36. /* debug skip */
  37. #if DEBUG
  38. #define DS(x) if(hp->debug) x
  39. #else
  40. #define DS(x)
  41. #endif
  42. #define DHLS "*hsclib* skip: "
  43.  
  44. /*
  45.  * forward references
  46.  */
  47. BOOL skip_expression(HSCPRC * hp, EXPSTR * content, int endmark);
  48.  
  49. /* append text to content, if it does not point to NULL;
  50.  * used by several functions in this file */
  51. #define APP_CONTENT(w) if (content) app_estr(content,(w))
  52. #define APP_CONTENT_ESTR(s) if (content) estrcat(content, (s))
  53. #define APP_CONTENT_CH(c) if (content) app_estrch(content,(c))
  54. #define APP_CONTENT_CWWS(inpf)                                       \
  55. {                                                                    \
  56.     /* apppend current white spaces & word to content_buffer */      \
  57.     APP_CONTENT(infgetcws((inpf)));                                  \
  58.     APP_CONTENT(infgetcw((inpf)));                                   \
  59. }
  60. /* append expstring and clear it afterwards */
  61. #define APP_CONTENT_ESTR_CLR(s)                                      \
  62. {                                                                    \
  63.     APP_CONTENT_ESTR((s));                                           \
  64.     clr_estr((s));                                                   \
  65. }
  66.  
  67. /* a small debugging-function to print a single char
  68.  * in hex, dez and, if useful,  ascii-representation */
  69. static VOID dbg_printc(int ch)
  70. {
  71.     fprintf(stderr, "%02x #%03d", ch, ch);
  72.     if (ch > 31)
  73.     {
  74.         fprintf(stderr, " `%c'", (char) ch);
  75.     }
  76.     fprintf(stderr, "\n");
  77. }
  78.  
  79. /*
  80.  * skip_next_lf
  81.  *
  82.  * ignore '\n'
  83.  *
  84.  * params: inpf...input file to read char from
  85.  * result: TRUE if skipped
  86.  */
  87. BOOL skip_next_lf(HSCPRC * hp)
  88. {
  89.     /* TODO: skip white-spaces after linefeed,
  90.      * if COMPACT set; but this has to be performed
  91.      * at another part of the code... */
  92.     /* TODO: what did I mean with the above??? */
  93.     INFILE *inpf = hp->inpf;
  94.  
  95.     int nc = infgetc(inpf);
  96.  
  97.     /* handle "\r\n", single "\r" and single "\n" */
  98.     if (nc == '\r')
  99.     {
  100.         nc = infgetc(inpf);
  101.     }
  102.     if (nc != '\n')
  103.         inungetc(nc, inpf);
  104.  
  105.     return ((BOOL) (nc == EOF));
  106. }
  107.  
  108. static BOOL eot_reached(HSCPRC * hp, BYTE * state)
  109. {
  110.     INFILE *inpf = hp->inpf;
  111.     STRPTR nw = infgetw(inpf);
  112.  
  113.     if (nw)
  114.     {
  115.         switch (*state)
  116.         {
  117.         case TGST_TAG:
  118.             if (!strcmp(nw, "\""))
  119.                 *state = TGST_DQUOTE;
  120.             else if (!strcmp(nw, "'"))
  121.                 *state = TGST_QUOTE;
  122. /* TODO: skip references & expressions */
  123. #if 0
  124.             else if (!strcmp(nw, "<"))
  125.                 *state = TGST_REF;
  126. #endif
  127.             else if (!strcmp(nw, ">"))
  128.                 *state = TGST_END;
  129.             break;
  130.  
  131.         case TGST_REF:
  132.         case TGST_QUOTE:
  133.         case TGST_DQUOTE:
  134.  
  135.             if (strcmp(nw, "\n"))
  136.             {
  137.                 switch (*state)
  138.                 {
  139.  
  140.                 case TGST_REF:
  141.                     if (!strcmp(nw, ">"))
  142.                         *state = TGST_TAG;
  143.                     break;
  144.  
  145.                 case TGST_QUOTE:
  146.                     if (!strcmp(nw, "'"))
  147.                         *state = TGST_TAG;
  148.                     break;
  149.  
  150.                 case TGST_DQUOTE:
  151.                     if (!strcmp(nw, "\""))
  152.                         *state = TGST_TAG;
  153.                     break;
  154.                 }
  155.             }
  156.             else
  157.             {
  158.                 /* unexpected end of line */
  159.                 hsc_msg_eol(hp);
  160.                 *state = TGST_TAG;      /* go on reading inside tag */
  161.             }
  162.  
  163.             break;
  164.         }
  165.     }
  166.     else
  167.     {
  168.         hsc_msg_eof(hp, "`>' expected");
  169.         *state = TGST_ERR;
  170.     }
  171.  
  172.     return ((BOOL) ((*state == TGST_END) || (*state == TGST_ERR)));
  173. }
  174.  
  175. /*
  176.  * skip_until_eot_args
  177.  *
  178.  * skip until end of tag reached,
  179.  * with user definable status vars
  180.  *
  181.  * params: inpf.....input file
  182.  *         quote....status for quote (TRUE=inside quote)
  183.  *         dquote...status for double quote
  184.  *         argattr..status for quote
  185.  * result: TRUE, if no fatal error
  186.  * errors: return FALSE
  187.  */
  188. static BOOL skip_until_eot_state(HSCPRC * hp, BYTE * state, EXPSTR * logstr)
  189. {
  190.     INFILE *inpf = hp->inpf;
  191.  
  192.     while (!eot_reached(hp, state))
  193.         if (logstr)
  194.         {
  195.             app_estr(logstr, infgetcws(inpf));
  196.             app_estr(logstr, infgetcw(inpf));
  197.         }
  198.  
  199.     /* append ">" */
  200.     if (logstr)
  201.     {
  202.         app_estr(logstr, infgetcws(inpf));
  203.         app_estr(logstr, infgetcw(inpf));
  204.     }
  205.  
  206.     return ((BOOL) ! (hp->fatal));
  207. }
  208.  
  209. /*
  210.  * skip_until_eot
  211.  *
  212.  * skip until end of tag reached
  213.  *
  214.  * params: inpf..input file
  215.  * result: TRUE, if no fatal error
  216.  * errors: return FALSE
  217.  */
  218. BOOL skip_until_eot(HSCPRC * hp, EXPSTR * logstr)
  219. {
  220.     BYTE state = TGST_TAG;
  221.  
  222.     return (skip_until_eot_state(hp, &state, logstr));
  223. }
  224.  
  225. /*
  226.  *-----------------
  227.  * skip comment
  228.  *-----------------
  229.  */
  230.  
  231. /*
  232.  * eoc_reched
  233.  *
  234.  * check if end of an hsc-comment is reached
  235.  *
  236.  * params:
  237.  *   inpf...where to read next word from
  238.  *   state..state var; has to be initiales by
  239.  *          calling func with CMST_TEXT
  240.  *   nest...comment netsing counter; has to be
  241.  *          initiales by calling func with 0
  242.  * result: TRUE, if end of comment reached
  243.  */
  244. BOOL eoc_reached(HSCPRC * hp, BYTE * state, LONG * nest)
  245. {
  246.     INFILE *inpf = hp->inpf;
  247.     STRPTR nw = infgetw(inpf);
  248.  
  249.     if (nw)
  250.     {
  251.         switch (*state)
  252.         {
  253.         case CMST_TEXT:
  254.             if (!strcmp(nw, "*"))
  255.                 *state = CMST_STAR;
  256.             else if (!strcmp(nw, "<"))
  257.                 *state = CMST_TAG;
  258.             break;
  259.  
  260.         case CMST_STAR:
  261.             if (!strcmp(nw, "*"))
  262.                 *state = CMST_STAR;
  263.             else if (!strcmp(nw, "<"))
  264.                 *state = CMST_TAG;
  265.             else if (!strcmp(nw, ">"))
  266.                 if (*nest)
  267.                 {
  268.                     (*nest)--;
  269.                     *state = CMST_TEXT;
  270.                 }
  271.                 else
  272.                     *state = CMST_END;
  273.  
  274.             break;
  275.  
  276.         case CMST_TAG:
  277.             if (!strcmp(nw, "<"))
  278.                 *state = CMST_TAG;
  279.             else
  280.             {
  281.                 if (!strcmp(nw, "*"))
  282.                     (*nest)++;
  283.                 *state = CMST_TEXT;
  284.             }
  285.             break;
  286.         }
  287.     }
  288.     else
  289.     {
  290.         hsc_msg_eof(hp, "missing end of comment (\"*>\")");
  291.         *state = CMST_ERR;
  292.     }
  293.  
  294.     return ((BOOL) ((*state == CMST_END) || (*state == CMST_ERR)));
  295. }
  296.  
  297. /*
  298.  * skip_hsc_comment
  299.  *
  300.  * skip text until '*>' occures;
  301.  * nested commets are supported
  302.  *
  303.  * params:
  304.  *  hp           hsc-process to work with
  305.  *  content      string where to store skipped text; NULL, if text should
  306.  *               not be stored (which is faster)
  307.  * result:
  308.  *  TRUE, if no fatal error occured
  309.  */
  310. BOOL skip_hsc_comment(HSCPRC * hp, EXPSTR * content)
  311. {
  312.     INFILE *inpf = hp->inpf;
  313.     int ch = infgetc(inpf);     /* read next char */
  314.     BYTE state = CMST_TEXT;     /* parser state */
  315.     LONG nesting = 0;           /* nesting counter */
  316.     BOOL end = FALSE;           /* flag: end of comment reached? */
  317.  
  318.     DS(fprintf(stderr, DHLS "skip_hsc_comment\n"));
  319.     while ((ch != EOF) && (!end))
  320.     {
  321. #if 0                           /* optional debugging */
  322.         STRPTR state_str = "UNKN";
  323.  
  324.         switch (state)
  325.         {
  326.         case CMST_TEXT:
  327.             state_str = "text";
  328.             break;
  329.         case CMST_TAG:
  330.             state_str = "tag ";
  331.             break;
  332.         case CMST_STAR:
  333.             state_str = "star";
  334.             break;
  335.         }
  336.  
  337.         fprintf(stderr, DHL "  st=%s  nc=%02x #%03d", state_str, ch, ch);
  338.         if (ch > 31)
  339.         {
  340.             fprintf(stderr, " '%c'", (char) ch);
  341.         }
  342.         fprintf(stderr, "\n");
  343. #endif
  344.  
  345.         /* append current char to content */
  346.         APP_CONTENT_CH(ch);
  347.  
  348.         /* handle current char */
  349.         switch (state)
  350.         {
  351.         case CMST_TEXT:
  352.             if (ch == '*')
  353.             {
  354.                 state = CMST_STAR;
  355.             }
  356.             else if (ch == '<')
  357.             {
  358.                 state = CMST_TAG;
  359.             }
  360.             break;
  361.  
  362.         case CMST_STAR:
  363.             if (ch == '*')
  364.             {
  365.                 state = CMST_STAR;
  366.             }
  367.             else if (ch == '<')
  368.             {
  369.                 state = CMST_TAG;
  370.             }
  371.             else if (ch == '>')
  372.             {
  373.                 if (nesting)
  374.                 {
  375.                     DS(fprintf(stderr,
  376.                              DHLS "  nested comment-end (%ld)\n", nesting));
  377.                     nesting--;
  378.                     state = CMST_TEXT;
  379.                 }
  380.                 else
  381.                 {
  382.                     end = TRUE;
  383.                 }
  384.             }
  385.  
  386.             break;
  387.  
  388.         case CMST_TAG:
  389.             if (ch == '<')
  390.                 state = CMST_TAG;
  391.             else
  392.             {
  393.                 if (ch == '*')
  394.                 {
  395.                     nesting++;
  396.                     DS(fprintf(stderr,
  397.                              DHLS "  nested comment-tag (%ld)\n", nesting));
  398.                 }
  399.                 state = CMST_TEXT;
  400.             }
  401.             break;
  402.         }
  403.  
  404.         /* read next char */
  405.         if (!end)
  406.         {
  407.             ch = infgetc(inpf);
  408.         }
  409.     }
  410.  
  411.     /* handle unexpected end-of-file */
  412.     if (ch == EOF)
  413.     {
  414.         hsc_msg_eof(hp, "missing end of comment (\"*>\")");
  415.     }
  416.  
  417.     return ((BOOL) ! (hp->fatal));
  418. }
  419.  
  420. /*
  421.  * skip_hsc_verbatim
  422.  *
  423.  * skip text until '|>' occures; nesting is not supported
  424.  *
  425.  * params:
  426.  *  hp           hsc-process to work with
  427.  *  content      string where to store skipped text; NULL, if text should
  428.  *               not be stored (which is faster)
  429.  * result:
  430.  *  TRUE, if no fatal error occured
  431.  */
  432. BOOL skip_hsc_verbatim(HSCPRC * hp, EXPSTR * content)
  433. {
  434. #define VBST_TEXT 1
  435. #define VBST_VBAR 2
  436. #define VBST_END  3
  437.  
  438.     INFILE *inpf = hp->inpf;
  439.     int ch = infgetc(inpf);     /* read next char */
  440.     BYTE state = VBST_TEXT;     /* parser state */
  441.  
  442.     while ((ch != EOF) && (state != VBST_END))
  443.     {
  444.         /* append current char to content */
  445.         APP_CONTENT_CH(ch);
  446.  
  447.         /* handle current char */
  448.         switch (state)
  449.         {
  450.         case VBST_TEXT:
  451.             if (ch == (HSC_VERBATIM_STR[0]))
  452.             {
  453.                 state = VBST_VBAR;
  454.             }
  455.             break;
  456.         case VBST_VBAR:
  457.             if (ch == '>')
  458.             {
  459.                 state = VBST_END;
  460.             }
  461.             else if (ch == (HSC_VERBATIM_STR[0]))
  462.             {
  463.                 state = VBST_VBAR;
  464.             }
  465.             else
  466.             {
  467.                 state = VBST_TEXT;
  468.             }
  469.             break;
  470.         default:
  471.             panic("unhandled state");
  472.             state = VBST_END;
  473.             break;
  474.         }
  475.  
  476.         /* read next char */
  477.         if (state != VBST_END)
  478.         {
  479.             ch = infgetc(inpf);
  480.         }
  481.     }
  482.  
  483.     /* handle unexpected end-of-file */
  484.     if (ch == EOF)
  485.     {
  486.         hsc_msg_eof(hp, "missing end verbatim section \"<|..|>\"");
  487.     }
  488.  
  489.     return ((BOOL) ! (hp->fatal));
  490. }
  491.  
  492. /*
  493.  * skip_sgml_special
  494.  *
  495.  * skip SGML special commands (beginning with "<!":
  496.  * - checks, if first two chars are "--"; if so,
  497.  *   it will treat input as SGML comment
  498.  *
  499.  * params:
  500.  *  hp           hsc-process to work with
  501.  *  content      string where to store skipped text; NULL, if text should
  502.  *               not be stored (which is faster)
  503.  * result:
  504.  *  TRUE, if no fatal error occured
  505.  */
  506.  
  507. /* display message "linefeed inside sgml-comment" */
  508. static VOID msg_lf_in_comment(HSCPRC * hp)
  509. {
  510.     hsc_message(hp, MSG_LF_IN_COMMENT,
  511.                 "line feed inside sgml-comment");
  512. }
  513.  
  514. BOOL skip_sgml_special(HSCPRC * hp, EXPSTR * content)
  515. {
  516.     INFILE *inpf = hp->inpf;
  517.     int ch = infgetc(inpf);     /* read next char */
  518.     int ch_prev = EOF;
  519.     BOOL end = FALSE;           /* flag: end of comment reached? */
  520.  
  521.     if (ch == '>')
  522.     {
  523.         hsc_message(hp, MSG_ZERO_COMMENT, "empty sgml comment");
  524.         end = TRUE;
  525.     }
  526.     else if (ch == '-')
  527.     {
  528.         ch_prev = ch;
  529.         ch = infgetc(inpf);
  530.         if (ch == '-')
  531.         {
  532.             BOOL inside_comment = TRUE;
  533.             BOOL warned_text = FALSE;
  534.  
  535.             DS(fprintf(stderr, DHLS "skip sgml comment\n"));
  536.             APP_CONTENT_CH(ch_prev);
  537.             APP_CONTENT_CH(ch);
  538.  
  539.             ch_prev = EOF;
  540.             ch = infgetc(inpf);
  541.  
  542.             while (!end && (ch != EOF))
  543.             {
  544.                 /* append current char to content */
  545.                 APP_CONTENT_CH(ch);
  546.  
  547.                 if ((ch == '-') && (ch_prev == '-'))
  548.                 {
  549.                     inside_comment = !inside_comment;
  550.                     warned_text = FALSE;
  551.                     ch_prev = EOF;
  552.                 }
  553.                 else if (ch == '-')
  554.                 {
  555.                     ch_prev = '-';
  556.                 }
  557.                 else if (ch == '\r')
  558.                 {
  559.                     ch_prev = '\r';
  560.                     msg_lf_in_comment(hp);
  561.                 }
  562.                 else
  563.                 {
  564.                     if (ch == '\n')
  565.                     {
  566.                         if (ch_prev != '\r')
  567.                         {
  568.                             msg_lf_in_comment(hp);
  569.                         }
  570.                     }
  571.  
  572.                     ch_prev = EOF;
  573.                     if (ch == '>')
  574.                     {
  575.                         if (inside_comment)
  576.                         {
  577.                             hsc_message(hp, MSG_GT_IN_COMMENT,
  578.                                         "%q inside sgml-comment", ">");
  579.                         }
  580.                         else
  581.                         {
  582.                             end = TRUE;
  583.                         }
  584.                     }
  585.                     else
  586.                     {
  587.                         if (!inside_comment && !warned_text)
  588.                         {
  589.                             hsc_message(hp, MSG_TEXT_IN_COMMENT,
  590.                                         "text outside sgml-comment context");
  591.                         }
  592.                         warned_text = TRUE;
  593.                     }
  594.                 }
  595.  
  596.                 if (!end)
  597.                 {
  598.                     /* read next char */
  599.                     ch = infgetc(inpf);
  600.                 }
  601.             }
  602.  
  603.             /* push back last char */
  604.             if (!end && (ch != EOF))
  605.             {
  606.                 inungetc(ch, inpf);
  607.             }
  608.         }
  609.         else
  610.         {
  611.             /* push back chars read until yet */
  612.             inungetc(ch, inpf);
  613.             inungetc(ch_prev, inpf);
  614.  
  615.             ch_prev = EOF;
  616.         }
  617.     }
  618.  
  619.     /* skip other "!"-tags (SSI and that bullshit) */
  620.     if (!end)
  621.     {
  622.         DS(fprintf(stderr, DHLS "skip sgml special\n"));
  623.  
  624.         APP_CONTENT_CH(ch);
  625.  
  626.         do
  627.         {
  628.             ch = infgetc(inpf);
  629.             if (ch != EOF)
  630.             {
  631.                 APP_CONTENT_CH(ch);
  632.                 DS(
  633.                       {
  634.                       fprintf(stderr, DHLS "  word starting with: ");
  635.                       dbg_printc(ch);
  636.                       fprintf(stderr, "\n");
  637.                       }
  638.                 );
  639.  
  640.                 if (ch == '>')
  641.                 {
  642.                     end = TRUE;
  643.                 }
  644.                 else
  645.                 {
  646.                     skip_expression(hp, content, ch);
  647.                 }
  648.             }
  649.         }
  650.         while ((ch != EOF) && !end);
  651.     }
  652.  
  653.     /* handle unexpected end-of-file */
  654.     if (ch == EOF)
  655.     {
  656.         hsc_msg_eof(hp, "missing end of sgml special tag \"<!..>\"");
  657.     }
  658.  
  659.     return ((BOOL) ! (hp->fatal));
  660. }
  661.  
  662. /*
  663.  * skip_expression
  664.  *
  665.  * skips expressions, string constants, functions calls
  666.  *
  667.  * params:
  668.  *  hp           hsc-process to work with
  669.  *  content      string where to store skipped text; NULL, if text should
  670.  *               not be stored (which is faster)
  671.  *  endmark      char that marks end-of-expression (see note below)
  672.  * result:
  673.  *  TRUE, if no fatal error occured
  674.  *
  675.  * NOTE on endmark:
  676.  *  " .............. skip until next "
  677.  *  ' .............. skip until next '
  678.  *  ` .............. skip until next `
  679.  *  ( .............. skip until next ), or recursively skip subexpression,
  680.  *                   if one of the above shows up before
  681.  *  anything else .. skip until white-space or ">"
  682.  *
  683.  * NOTE:
  684.  *  internally, endmark='(' is immediately converted to endmark=')'
  685.  */
  686.  
  687. /* decides, if a char is a supported endmark character */
  688. static BOOL is_endmark(int ch)
  689. {
  690.     BOOL it_is = FALSE;
  691.     if (strchr("(`'\"", ch))
  692.     {
  693.         it_is = TRUE;
  694.     }
  695.     return (it_is);
  696. }
  697.  
  698. /* some debug macros */
  699. #if 1
  700. #define DBG_CH(ch)                               \
  701.     DS(                                          \
  702.           {                                      \
  703.           fprintf(stderr, DHLS "  ch=");         \
  704.           dbg_printc((ch));                      \
  705.           }                                      \
  706.     );
  707. #else
  708. #define DBG_NC                  /* nufin */
  709. #endif
  710.  
  711. BOOL skip_expression(HSCPRC * hp, EXPSTR * content, int endmark)
  712. {
  713. #define IS_ENDMARK(x) ((BOOL)
  714.     BOOL quit = FALSE;
  715.     BOOL usual_endmark = is_endmark(endmark);
  716.     int ch = EOF;
  717.  
  718.     DS(fprintf(stderr, DHLS "  skip expression, end=`%c'\n", endmark));
  719.  
  720.     if (endmark == '(')
  721.     {
  722.         endmark = ')';
  723.     }
  724.  
  725.     do
  726.     {
  727.         ch = infgetc(hp->inpf);
  728.  
  729.         if (!usual_endmark)
  730.         {
  731.             /* skip until white-space or ">" */
  732.             if (hsc_whtspc(ch) || (ch == '>'))
  733.             {
  734.                 inungetc(ch, hp->inpf);
  735.                 quit = TRUE;
  736.             }
  737.             else
  738.             {
  739.                 APP_CONTENT_CH(ch);
  740.                 DBG_CH(ch);
  741.             }
  742.         }
  743.         else if (endmark == ')')
  744.         {
  745.             APP_CONTENT_CH(ch);
  746.             if (is_endmark(ch))
  747.             {
  748.                 DS(fprintf(stderr, DHLS "  skip sub-expression\n"));
  749.                 skip_expression(hp, content, ch);
  750.             }
  751.             else if (ch == endmark)
  752.             {
  753.                 quit = TRUE;
  754.             }
  755.             else
  756.             {
  757.                 DBG_CH(ch);
  758.             }
  759.         }
  760.         else
  761.         {
  762.             APP_CONTENT_CH(ch);
  763.             if (ch == endmark)
  764.             {
  765.                 quit = TRUE;
  766.             }
  767.             else
  768.             {
  769.                 /* do nufin, just append normal char to expression */
  770.                 DBG_CH(ch);
  771.             }
  772.         }
  773.     }
  774.     while (!quit && !(ch == EOF) && !(hp->fatal));
  775.  
  776.     /* handle unexpected end-of-file */
  777.     if (ch == EOF)
  778.     {
  779.         EXPSTR *expected = init_estr(0);
  780.         if (usual_endmark)
  781.         {
  782.             app_estrch(expected, '`');
  783.             app_estrch(expected, endmark);
  784.             app_estrch(expected, '\'');
  785.         }
  786.         else
  787.         {
  788.             app_estr(expected, "white space or `>'");
  789.         }
  790.         app_estr(expected, " expected");
  791.         hsc_msg_eof(hp, estr2str(expected));
  792.         del_estr(expected);
  793.     }
  794.  
  795.     return ((BOOL) ! (hp->fatal));
  796. }
  797.  
  798. /*
  799.  * skip_tag_attribs
  800.  *
  801.  * skip tag attributes, until ">" shows up
  802.  *
  803.  * params:
  804.  *  hp           hsc-process to work with
  805.  *  content      string where to store skipped text; NULL, if text should
  806.  *               not be stored (which is faster)
  807.  * result:
  808.  *  TRUE, if no fatal error occured
  809.  */
  810. #define STATE_TAGATTR        10 /* parsing tag-attribs */
  811. #define STATE_TAGATTR_EQ     11 /* "=" inside tag */
  812.  
  813. BOOL skip_tag_attribs(HSCPRC * hp, EXPSTR * content)
  814. {
  815.     /* TODO: on illegal attr name, abort wth message */
  816.     /* TODO: on "\n" in value, display message */
  817.     /* TODO: conditional assignments */
  818.     UBYTE state = STATE_TAGATTR;
  819.     INFILE *inpf = hp->inpf;    /* input file */
  820.     STRPTR nw = NULL;
  821.     BOOL quit = FALSE;          /* flag: exit from skipping */
  822.  
  823.     do
  824.     {
  825.         /* get next word  or attribute name */
  826.         if (state == STATE_TAGATTR)
  827.         {
  828.             nw = infget_attrid(hp);
  829.         }
  830.         else
  831.         {
  832.             nw = infgetw(inpf);
  833.         }
  834.  
  835.         if (nw)
  836.         {
  837.             app_estr(content, infgetcws(inpf));
  838.             app_estr(content, infgetcw(inpf));
  839.  
  840.             switch (state)
  841.             {
  842.             case STATE_TAGATTR:
  843.                 {
  844.                     if (!strcmp(nw, "="))
  845.                     {
  846.                         /* normal assignment */
  847.                         state = STATE_TAGATTR_EQ;
  848.                     }
  849.                     else if (!strcmp(nw, "?"))
  850.                     {
  851.                         /* conditional assignment:
  852.                          * just check for succeeding "=",
  853.                          * and push it back into input stream */
  854.                         parse_eq(hp);
  855.                         inungetcw(inpf);
  856.                     }
  857.                     else if (!strcmp(nw, ">"))
  858.                     {
  859.                         DS(fprintf(stderr, DHLS "end-of-tag-call\n"));
  860.                         quit = TRUE;
  861.                     }
  862.                     else
  863.                     {
  864.                         /* just skipped a boolean attribute;
  865.                          * nufin to do about it */
  866.                     }
  867.                     break;
  868.                 }
  869.  
  870.             case STATE_TAGATTR_EQ:
  871.                 skip_expression(hp, content, nw[0]);
  872.                 DS(fprintf(stderr, DHLS "end-of-tag-expression\n"));
  873.                 state = STATE_TAGATTR;
  874.                 break;
  875.  
  876.                 /* unhandled state */
  877.             default:
  878.                 panic("unhandled state");
  879.                 break;
  880.             }
  881.         }
  882.     }
  883.     while (nw && !quit && !(hp->fatal));
  884.  
  885.     return ((BOOL) ! (hp->fatal));
  886. }
  887.  
  888. /*
  889.  * skip_until_tag
  890.  *
  891.  * skip everything, until a specific tag (one of tagstoplist or tagnest)
  892.  * is found. if tagnest occures as a start tag, another instance of the
  893.  * corresponding end tag has to occure to abort the skipping operation.
  894.  *
  895.  * params:
  896.  *  hp           hsc-process to work with
  897.  *  content      string where to store skipped text; NULL, if text should
  898.  *               not be stored (which is faster)
  899.  *  tagfound     destination string that will store name of tag that lead
  900.  *               to abortion of skip (eg "$else"); if this string is NULL,
  901.  *               it will be ignored
  902.  *  tagstoplist  list of tags to stop on, sparated with vertical bars `|'
  903.  *               eg. "$else|$elseif"
  904.  *  tagnest      single tag, that maintains a nesting-counter, depending
  905.  *               on wheter it occures as a start-tag or not; if the
  906.  *               nesting-counter is 0 and it occures as a stop-tag, it
  907.  *               will also stop skipping (eg "$if")
  908.  *  option       options for skipping (see hsclib/skip.h):
  909.  *                 SKUT_NO_SKIP_TAGFOUND
  910.  *                   do not skip last tagnest; when reading from the
  911.  *                   input next, it will show up again
  912.  *                 SKUT_NO_CONTENT_TAGFOUND
  913.  *                   do not append last tagnest to content
  914.  *                 SKUT_CLEAR_CONTENT
  915.  *                   clear content before appending anything
  916.  *                 SKUT_NO_ANALYSE_TAGS
  917.  *                   do not analyse tag attributes and special tags;
  918.  *                   this is more or less only used by <$source>
  919.  */
  920. #define STATE_TEXT            1 /* normal text */
  921. #define STATE_TAG             2 /* after "<" */
  922. #define STATE_COMMENT         3 /* inside hsc-comment */
  923. #define STATE_COMMENT_STAR    4 /* inside hsc-comment, after "*" */
  924. #define STATE_TAG_STOP        5 /* found tag in stoplist */
  925. #define STATE_SKIP            6 /* inside `skip section' "<|..|>" */
  926. #define STATE_VBAR            7 /* inside `skip section', after "|" */
  927. #define STATE_ENDTAG          8 /* after end-tagname */
  928.  
  929. #define STATE_COMMENT_TAG    14 /* found "<" inside comment (nest comment) */
  930.  
  931. #define STATE_EXIT_ERROR_EOF 99 /* unexpected eof */
  932.  
  933. BOOL skip_until_tag(HSCPRC * hp, EXPSTR * content, EXPSTR * tagfound, STRPTR tagstoplist, STRPTR tagnest, ULONG option)
  934. {
  935. #define RESET_TAGSTR(init)
  936.     UBYTE state = STATE_TEXT;   /* */
  937.     INFILE *inpf = hp->inpf;    /* input file */
  938.     LONG nesting = 0;           /* tag-nesting */
  939.     STRPTR nw = NULL;
  940.     BOOL quit = FALSE;          /* flag: exit from skipping */
  941.     EXPSTR *tagstr = init_estr(128);    /* text of current tag */
  942.  
  943.     /* clear result-var tagfound, if passed */
  944.     if (tagfound)
  945.     {
  946.         clr_estr(tagfound);
  947.     }
  948.  
  949.     /* clear content on request */
  950.     if (content && (option & SKUT_CLEAR_CONTENT))
  951.     {
  952.         clr_estr(content);
  953.     }
  954.  
  955.     do
  956.     {
  957.         /* get next word or tag-id */
  958.         if ((state != STATE_TAG)
  959.             && (state != STATE_ENDTAG))
  960.         {
  961.             nw = infgetw(inpf);
  962.         }
  963.         else
  964.         {
  965.             nw = infget_tagid(hp);
  966.         }
  967.  
  968.         if (nw)
  969.         {
  970. #if 0
  971.             /* optional debugging stuff:
  972.              * display conent and tagstr on every change */
  973.             if (content && estrlen(content))
  974.             {
  975.                 DS(fprintf(stderr, DHLS "  contnt=`%s'\n", estr2str(content)));
  976.             }
  977.  
  978.             if (estrlen(tagstr))
  979.             {
  980.                 DS(fprintf(stderr, DHLS "  tagstr=`%s'\n", estr2str(tagstr)));
  981.             }
  982. #endif
  983.  
  984.             switch (state)
  985.             {
  986.                 /* check if tag starts */
  987.             case STATE_TEXT:
  988.                 if (!strcmp(nw, "<"))
  989.                 {
  990.                     /* add white spaces to content */
  991.                     APP_CONTENT(infgetcws(inpf));
  992.                     /* reset tag string with "<" */
  993.                     set_estr(tagstr, nw);
  994.  
  995.                     state = STATE_TAG;
  996.                 }
  997.                 else
  998.                 {
  999.                     APP_CONTENT_CWWS(inpf);
  1000.                     if (estrlen(tagstr))
  1001.                     {
  1002.                         clr_estr(tagstr);
  1003.                     }
  1004.                 }
  1005.                 break;
  1006.  
  1007.                 /* check which tag it is and how to act */
  1008.             case STATE_TAG:
  1009.                 {
  1010.                     if (!strcmp(nw, "<"))
  1011.                     {
  1012.                         /* this handles constructs like ``<</$source>''
  1013.                          * correctly */
  1014.                         APP_CONTENT_ESTR(tagstr);
  1015.  
  1016.                         /* unget current "<" */
  1017.                         inungetcwws(inpf);
  1018.  
  1019.                         /* switch back to text-parsing */
  1020.                         state = STATE_TEXT;
  1021.                     }
  1022.                     else
  1023.                     {
  1024.                         /* flag: attribs of tag should be skipped
  1025.                          *   (and appended to tagstr) */
  1026.                         BOOL skip_attribs = FALSE;
  1027.                         /* flag: tagstr should be appended to content */
  1028.                         BOOL append_tag = FALSE;
  1029.  
  1030.                         app_estr(tagstr, infgetcws(inpf));
  1031.                         app_estr(tagstr, infgetcw(inpf));
  1032.  
  1033.                         /* check for end tag */
  1034.                         if (!strcmp(nw, "/"))
  1035.                         {
  1036.                             state = STATE_ENDTAG;
  1037.                         }
  1038.                         else if (option & SKUT_NO_ANALYSE_TAGS)
  1039.                         {
  1040.                             /* abort tag scan */
  1041.                             append_tag = TRUE;
  1042.                         }
  1043.                         /* check, if hsc-comment reached */
  1044.                         else if (!upstrcmp(nw, HSC_COMMENT_STR))
  1045.                         {
  1046.                             DS(fprintf(stderr, DHLS "hsc-comment\n"));
  1047.                             APP_CONTENT(estr2str(tagstr));
  1048.                             skip_hsc_comment(hp, content);
  1049.                             state = STATE_TEXT;
  1050.                         }
  1051.                         /* check, if hsc-verbatim reached */
  1052.                         else if (!upstrcmp(nw, HSC_VERBATIM_STR))
  1053.                         {
  1054.                             DS(fprintf(stderr, DHLS "hsc-verbatim\n"));
  1055.                             APP_CONTENT(estr2str(tagstr));
  1056.                             skip_hsc_verbatim(hp, content);
  1057.                             state = STATE_TEXT;
  1058.                         }
  1059.                         /* check, if hsc-source reached */
  1060.                         else if (!upstrcmp(nw, HSC_SOURCE_STR))
  1061.                         {
  1062.                             DS(fprintf(stderr, DHLS "hsc-source\n"));
  1063.                             APP_CONTENT(estr2str(tagstr));
  1064.                             skip_tag_attribs(hp, content);
  1065.                             skip_until_tag(hp, content, NULL, NULL,
  1066.                                            HSC_SOURCE_STR,
  1067.                                            SKUT_NO_ANALYSE_TAGS);
  1068.                             state = STATE_TEXT;
  1069.                         }
  1070.                         /* check, if sgml-special-tag reached */
  1071.                         else if (!strcmp(nw, "!"))
  1072.                         {
  1073.                             DS(fprintf(stderr, DHLS "sgml-special\n"));
  1074.                             APP_CONTENT(estr2str(tagstr));
  1075.                             skip_sgml_special(hp, content);
  1076.                             state = STATE_TEXT;
  1077.                         }
  1078.                         else
  1079.                         {
  1080.                             HSCTAG *tag = find_strtag(hp->deftag, nw);
  1081.                             DS(fprintf(stderr, DHLS "tag <%s>\n", nw));
  1082.  
  1083.                             /* check, if nesting-tag should be incr. */
  1084.                             if (!upstrcmp(nw, tagnest))
  1085.                             {
  1086.                                 DS(fprintf(stderr, DHLS "  nest-tag (%ld)\n", nesting));
  1087.                                 nesting++;
  1088.                                 skip_attribs = TRUE;
  1089.                                 append_tag = TRUE;
  1090.                             }
  1091.                             /* check, if stop-tag reached */
  1092.                             else if (!nesting && tagstoplist
  1093.                               && strenum(nw, tagstoplist, '|', STEN_NOCASE))
  1094.                             {
  1095.                                 DS(fprintf(stderr, DHLS "  stop-tag <%s>\n", nw));
  1096.                                 if (tagfound)
  1097.                                 {
  1098.                                     set_estr(tagfound, nw);
  1099.                                 }
  1100.                                 skip_attribs = TRUE;
  1101.                                 quit = TRUE;
  1102.                             }
  1103.                             /* ignore special tags, switch back to text;
  1104.                              * TODO: this is ugly, eg a
  1105.                              *     <$define sepp:string="<bla">
  1106.                              * will cause trouble */
  1107.                             else if (tag && (tag->option & HT_SPECIAL))
  1108.                             {
  1109.                                 DS(fprintf(stderr, DHLS "  special tag; ignore\n"));
  1110.                                 state = STATE_TEXT;
  1111.                                 append_tag = TRUE;
  1112.                             }
  1113.                             /* for standard tags, just skip attributes */
  1114.                             else
  1115.                             {
  1116.                                 append_tag = TRUE;
  1117.                                 skip_attribs = TRUE;
  1118.                             }
  1119.  
  1120.                             /* skip tag attributes, if requested */
  1121.                             if (skip_attribs)
  1122.                             {
  1123.                                 skip_tag_attribs(hp, tagstr);
  1124.                                 state = STATE_TEXT;
  1125.                             }
  1126.                         }
  1127.  
  1128.                         /* append tag text to content, if requested */
  1129.                         if (append_tag)
  1130.                         {
  1131.                             DS(fprintf(stderr, DHLS "  append `%s'\n",
  1132.                                        estr2str(tagstr)));
  1133.                             APP_CONTENT(estr2str(tagstr));
  1134.                             state = STATE_TEXT;
  1135.                         }
  1136.                     }
  1137.                     break;
  1138.                 }
  1139.  
  1140.             case STATE_ENDTAG:
  1141.                 {
  1142.                     DS(fprintf(stderr, DHLS "end tag </%s>\n", nw));
  1143.                     app_estr(tagstr, infgetcws(inpf));
  1144.                     app_estr(tagstr, infgetcw(inpf));
  1145.                     if (!upstrcmp(nw, tagnest))
  1146.                     {
  1147.                         if (nesting)
  1148.                         {
  1149.                             nesting--;
  1150.                             DS(fprintf(stderr, DHLS "  nest-tag (%ld)\n", nesting));
  1151.                         }
  1152.                         else
  1153.                         {
  1154.                             DS(fprintf(stderr, DHLS "  nest-tag: ending\n"));
  1155.                             quit = TRUE;
  1156.                         }
  1157.                     }
  1158.  
  1159.                     /* skip and check ">" */
  1160.                     if (quit || !(option & SKUT_NO_ANALYSE_TAGS))
  1161.                     {
  1162.                         nw = infgetw(inpf);
  1163.                         if (nw)
  1164.                         {
  1165.                             app_estr(tagstr, infgetcws(inpf));
  1166.                             app_estr(tagstr, infgetcw(inpf));
  1167.                             inungetcw(inpf);
  1168.                         }
  1169.                         parse_gt(hp);
  1170.                     }
  1171.  
  1172.                     /* append end tag text */
  1173.                     if (!quit)
  1174.                     {
  1175.                         APP_CONTENT(estr2str(tagstr));
  1176.                     }
  1177.  
  1178.                     /* no attr for endtag */
  1179.                     state = STATE_TEXT;
  1180.  
  1181.                     break;
  1182.                 }
  1183.                 /* unhandled state */
  1184.             default:
  1185.                 panic("unhandled state");
  1186.                 break;
  1187.             }
  1188.         }
  1189.     }
  1190.     while (nw && !quit && !(hp->fatal));
  1191.  
  1192.     if (nw)
  1193.     {
  1194.         /* unget end tag */
  1195.         if (option & SKUT_NO_SKIP_TAGFOUND)
  1196.         {
  1197.             inungets(estr2str(tagstr), inpf);
  1198.         }
  1199.  
  1200.         /* add last tag to content */
  1201.         if (!(option & SKUT_NO_CONTENT_TAGFOUND))
  1202.         {
  1203.             APP_CONTENT(estr2str(tagstr));
  1204.         }
  1205.     }
  1206.     else
  1207.     {
  1208.         /* unexpected end-of-file */
  1209.         set_estr(tagstr, "</");
  1210.         app_estr(tagstr, tagnest);
  1211.         app_estr(tagstr, "> expected");
  1212.         hsc_msg_eof(hp, estr2str(tagstr));
  1213.     }
  1214.  
  1215.     /* cleanup */
  1216.     del_estr(tagstr);
  1217.  
  1218.     return ((BOOL) (nw != NULL));
  1219. }
  1220.